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/u/bmaison/bin/perl5 



open(f , "/u/mangu/C4.5/R8/iyiyData/two_new_new.data") I I die "I can' 



$maxClustNo=0; 
$maxDurNo=0; 
$maxnoCand=0; 
$maxClustLen=0 ; 

$count=0 ; 

open (f out, "» rules"); 



while (<f>) { 
chop ; 

@a=split (/, /) ; 

$truth [$count] =$a [14] ; 

$prediction [$count] =1; 

$wordl{$a[l] }=1; 
$word2{$a [2] }=1; 
$pair{$a[l] ."_".$a[2] }++; 
$epspair{$a[3] ."_".$a[4] }=1; 

if ($a[0] > $maxClustNo) { 
$maxClustNo=$a [0] ; 

} 

if ($a[5] > $maxDurNo) { 
$maxDurNo=$a [5] ; 

} 

if ($a[6] > $maxDurNo) { 
$maxDurNo=$a [6] ; 

} 

if ($a[10] > $raaxCandNo) { 
$maxCandNo = $a[10]; 

} 

if ($a[12] > $maxClustLen) { 
$maxClustLen = $a[12]; 

} 

for ($i=0; $i<=$#a; $i++) { 
$sent [$count] [$i] = $a[$i]; 

} 

$count++; 



II 
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close (f ) ; 
$no_sent=$count ; 

#print "$maxClustNo $maxDurNo $maxnoCand $maxClustLen $no_sent\ 
n"; 



while ( ($k, $v) =each %pair) { 
if ($v >=10) { 

print $k, "\t",$v, "\n"; 

} 

} 



$iterationl=0; 
$best score=2; 



while ($best_score >= 2) { 
$iteration++; 
undef %clusNo_bad; 
undef %clusNo_good; 
undef %wordl_good; 
undef %wordl_bad; 
undef %word2_good; 
undef %word2_bad; 
undef %pair_good; 
undef %pair_bad; 
undef %isEpsl_good; 
undef %isEpsl_bad; 
undef %isEps2_good; 
undef %isEps2_bad; 
undef %epspair_good; 
undef %epspair_bad; 
undef %durl_good; 
undef %durl_bad; 
undef %dur2_good; 
undef %dur2_bad; 
undef %Postl_good; 
undef %Postl_bad; 
undef %Post2__good; 
undef %Post2_bad; 

undef %dif f Post_good; 
undef %dif f Post_bad; 

undef %newCand_good; 
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undef %newCand_bad; 

undef %Cand_good; 
undef %Cand_bad; 

undef %ClusterLen_good; 
undef %ClusterLen_bad; 

undef %score; 

for($i=0; $i<$no_sent; $i++) { 

$postl=int ($sent[$i] [7]*100) ; 
$post2=int ($sent [$i] [8] *100) ; 
$diff Post=int ($sent [$i] [11] *100) ; 

if ($truth [$i] == $prediction [$i] ) { 
# the change is bad 

$clusNo_bad{$sent [$i] [0] } {$truth[$i] }++; 
$wordl_bad{$sent [$i] [1] } {$truth[$i] }++; 
$word2_bad{$sent [$i] [2] } {$truth[$i] }++; 

$pair_bad{$sent [$i] [1] . "_" . $sent [$i] [2] }{$truth[$i] }++; 
$isEpsl_bad{$sent [$i] [3] } {$truth[$i] }++; 
$isEps2_bad{$sent [$i] [4] } {$truth[$i] }++; 
$epspair_bad{$sent [$i] [3] . "_" . $sent [$i] [4] } {$truth[$i] } 

$durl_bad{$sent [$i] [5] } {$truth[$i] }++; 
$dur2_bad{$sent [$i] [6] } {$truth[$i] }++; 

$Postl_bad{$postl} {$truth [$i] }++; 
$Post2_bad{$post2} {$truth[$i] }++; 

$newCand_bad{$sent [$i] [9] } {$truth[$i] }++; 
$Cand_bad{$sent [$i] [10] } {$truth[$i] }++; 
$diffPost_bad{$diffPost} {$truth[$i] }++; 
$ClusterLen_bad{$sent [$i] [12] } {$truth[$i] }++; 

} 

else{ 

$change="$prediction [$i] : $truth [$i] " ; 
#print $change, "\n" ; 
$rule_type{$change}=l; # l->2 2->l 
$clusNo_good{$sent [$i] [0] } {$change}++; 
$wordl_good{ $sent [$i] [1] } { $change}++; 
$word2_good{ $sent [$i] [2] } { $change}++; 

$pair_good{$sent [$i] [1] . "_" . $sent [$i] [2] } {$change}++; 
$isEpsl_good{ $sent [$i] [3] } {$ change } ++; 
$isEps2_good{ $sent [$i] [4] } {$change}++; 

$epspair_good{$sent [$i] [3] . "_" . $sent [$i] [4] } { $change}++ 
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$durl_good{$sent [$i] [5] } {$change}++; 
$dur2_good{$sent [$i] [6] } {$change}++; 

$Postl_good{ $postl } { $change } ++; 
$Post2_good { $post2 } { $change } ++ ; 

$newCand_good{ $sent [$i] [9] } {$change}++; 
$Cand_good{ $sent [$i] [10] } {$change}++; 

$dif f Post_good{ $dif f Post } { $change}++; 
$ClusterLen_good{$sent [$i] [12] } {$change}++; 

} 

} 

while ( ($change, $v) = each %rule_type) { 
@b=split ( / : /, $ change) ; 
$pred=$b[0] ; 

print $change, "\n"; 

$ruleEQ=$change . " : 0 : eq" ; # look at the first attribute and com 
pare eq 

with the value ; do $ k if true 
$ruleLS=$change. " : 0: Is"; 
$ruleGT=$change . " : 0 : gt " ; 

$rules { $ruleEQ}=l; 
$rules { $ruleLS}=l; 
$rules { $ruleGT}=l; 



for ($i=0; $i<=$maxClustNo; $i++) { 

$score{$ruleEQ} {$i}= $clusNo_good{ $i } { $change } - 
$clusNo_bad{$i} {$pred} ; 

print $ruleEQ, "\t", $i, "\t", $clusNo_good { $i } { $change } , " - 

$clusNo_bad{$i} {$pred} , " = $score { $ruleEQ} { $i } \n" ; 
} 

for ($i=l; $i<=$maxClustNo; $i++) { 
for ($j=0; $j<$i; $j++) { 

$score{$ruleLS} { $i } +=$score { $ruleEQ } {$j } ; 

} 

print $ruleLS, "\t", $i, "\t " , $score { $ruleLS } { $i } , "\n"; 

} 

for ($i=$maxClustNo-l; $i>=0; $i--) { 
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for ($j=$i; $ j<=$maxClustNo; $j++) { 

$score{ $ruleGT} { $i }+=$score{ $ruleEQ} {$j } ; 

} 

print $ruleGT, "\t",$i, " \t " , $score { $ruleGT } { $i } , "\n"; 

} 

$ruleEQl=$change . " : 1 : eq" ; 
$ruleEQ2=$change . " : 2 : eq" ; 
$ruleEQ3=$change . " : 1_2 : eq" ; 

$rules{$ruleEQl}=l; 
$rules{$ruleEQ2 }=1; 
$rules { $ruleEQ3 }=1; 



while ( ($wl, $sl) = each %wordl) { 

#print $wl,"\t", $wordl_good{$wl} {$change}, "~\t--", 
$wordl_bad{$wl} {$pred} , "\n"; 

$score{ $ruleEQl } { $wl }= $wordl_good{ $wl } { $change } - 
$wordl_bad{$wl} {$pred} ; 
} 

while ( ($wl, $sl) = each %word2) { 

#print $wl,"\t", $word2_good{$wl} {$change} , "~\t--", 
$word2_bad{$wl} {$pred} , "\n"; 

$score{$ruleEQ2} {$wl}= $word2_good{ $wl } { $change } - 
$word2_bad{$wl} {$pred} ; 
} 

while ( ($wl, $sl) = each %pair) { 

#print $wl,"\t", $pair_good{$wl} {$change} , " — \t — 
$pair_bad { $wl } { $pred } , " \n" ; 

$score{$ruleEQ3} {$wl}= $pair_good{ $wl } { $change } - 
$pair_bad{ $wl } { $pred} ; 
} 

$ruleEQl=$change . " : 3 : eq" ; 
$ruleEQ2=$change . " : 4 : eq" ; 
$ruleEQ3=$change . " : 3_4 : eq" ; 

$rules{ $ruleEQl }=l; 
$rules{$ruleEQ2 }=l; 
$rules{$ruleEQ3}=l; 

$wl="yes"; 
$w2="no"; 

$score{ $ruleEQl } { $wl }= $isEpsl_good{ $wl } { $change } - 
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$isEpsl_bad{$wl} {$pred} ; 

$score{$ruleEQl} {$w2}= $isEpsl_good{ $w2 } {$change} - 
$isEpsl_bad{$w2 } { $pred} ; 

$score{$ruleEQ2} {$wl}= $isEps2_good{ $wl } {$change} - 
$isEps2_bad{ $wl } { $pred} ; 

$score{$ruleEQ2 } {$w2}= $isEps2_good{ $w2 } {$change} - 
$isEps2_bad{ $w2 } { $pred} ; 

while ( ($wl, $sl) = each %epspair) { 

$score{ $ruleEQ3 } {$wl}= $epspair_good{ $wl } {$change} - 
$epspair_bad{ $wl } { $pred} ; 

print $wl,"\t", $epspair_good{ $wl } { $change} , " - 
$epspair_bad{$wl} {$pred}, " = $score { $ruleEQ3 } { $wl } \n" ; 
} 

$ruleEQ=$change. ":5:eq"; # look at the first attribute and com 
pare eq 

with the value; do $k: if true 
$ruleLS=$change . " : 5 : Is"; 
$ruleGT=$change . " : 5 : gt " ; 

$ rules { $ruleEQ} =1 ; 
$rules{ $ruleLS}=l; 
$rules{$ruleGT}=l; 

for ($i=0; $i<=$maxDurNo; $i++) { 

$score{$ruleEQ} {$i}= $durl_good{ $i } { $change } - 
$durl_bad{ $i } { $pred} ; 

print $ruleEQ, "\t",$i, "\t", $durl_good{ $i } { $change } , " - 
$durl_bad{$i} {$pred} , " = $score{ $ruleEQ} { $i } \n" ; 
} 

for ($i=l; $i<=$maxDurNo; $i++) { 
for ($j=0; $j<$i; $j++) { 

$score{$ruleLS} { $i }+=$ score { $ruleEQ } {$j } ; 

} 

print $ruleLS, "\t",$i, "\t" , $score { $ruleLS } {$i} , "\n"; 

} 

for ($i=$maxDurNo-l; $i>=0; $i — ) { 
for ($j=$i; $ j<=$maxDurNo; $j++) { 

$score{$ruleGT} { $i }+=$ score { $ruleEQ} {$j } ; 

} 

print $ruleGT, "\t", $i, "\t", $score{ $ruleGT} {$i} , "\n"; 

} 

$ruleEQ=$change . " : 6 : eq" ; # look at the first attribute and com 
pare eq 
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with the value; do $lc if true 
$ruleLS=$change. " : 6: Is"; 
$ruleGT=$change. " : 6:gt"; 

$rules{ $ruleEQ}=l; 
$rules{ $ruleLS }=1; 
$rules{ $ruleGT }=1; 

for ($i=0; $i<=$maxDurNo; $i++) { 

$score{$ruleEQ} {$i}= $dur2_good{ $i } { $change } - 
$dur2_bad{ $i } {$pred} ; 

print $ruleEQ,"\t",$i,"\t", $dur2_good{ $i } { $change } , " - 
$dur2_bad{$i} {$pred} , " = $score{$ruleEQ} {$i}\n"; 
} 

for ($i=l; $i<=$maxDurNo; $i++) { 
for ($j=0; $j<$i; $j++) { 

$score{$ruleLS} {$i}+=$score{$ruleEQ} {$j } ; 

} 

print $ruleLS, "\t",$i, "\t" , $score { $ruleLS } { $i } , "\n"; 

} 

for ($i=$maxDurNo-l; $i>=0; $i — ) { 
for ($j=$i; $j<=$maxDurNo; $j++){ 

$score{$ruleGT} { $i } +=$score { $ruleEQ} {$j } ; 

} 

print $ruleGT, "\t", $i, " \t " , $score { $ruleGT } { $i } , "\n"; 

} 

$ruleLS=$change .":7:1s"; 
$ruleGT=$change. ":7:gt"; 

$rules{ $ruleLS}=l; 
$rules{ $ruleGT}=l; 

for ($i=0; $i<=100; $i++) { 
for ($j=0; $j<$i; $j++) { 

$score{ $ruleLS} { $i } +=$Postl_good{ $ j } {$change} - 
$Postl_bad{$j } {$pred}; ; 
} 

print $ruleLS, "\t",$i, "\t" , $score { $ruleLS } {$i} , "\n"; 

} 

for ($i=99; $i>=0; $i--) { 
for ($j=$i; $j<=100; $j++) { 

$score{ $ruleGT} { $i }+=$Postl_good{ $ j } {$change} - 
$Postl_bad{$j } {$pred} ; ; 
} 
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print $ruleGT, "\t",$i, "\t " , $score{ $ruleGT } { $i } , "\n" ; 

} 

$ruleLS=$change. ":8:1s"; 
$ruleGT=$change. " : 8 :gt"; 
$rules{ $ruleLS }=l; 
$rules{ $ruleGT }=1; 

for ($i=0; $i<=100; $i++) { 

#print $ruleLS, "\t",$i, "\t", $Post2_good{ $i } { $change} , " 

_ If 

$Post2_bad{$i} {$pred}, "\n"; 

for ($j=0; $j<$i; $j++) { 

$score{$ruleLS} { $i } +=$Post2_good{ $ j } {$change} - 
$Post2_bad{$j } {$pred} ; ; 

} 

print $ruleLS, "\t",$i, "\t " , $score{ $ruleLS } { $i } , "\n" ; 

} 

for ($i=99; $i>=0; $i— ) { 
for ($j=$i; $j<=100; $j++) { 

$score{$ruleGT} { $i } +=$Post2_good{ $ j } {$change} - 
$Post2_bad{$j } {$pred} ; ; 
} 

print $ruleGT, "\t",$i, "\t", $score{ $ruleGT} {$i}, "\n"; 

} 

$ruleEQ=$change. " : 9:eq"; 
$rules{$ruleEQ}=l; 

for ($i=2; $i<=3; $i++) { 

print $ruleEQ, "\t", $i, "\t", $newCand_good{ $i } { $change } , 

II 

$newCand_bad{$i} {$pred} , "\n"; 

$score{ $ruleEQ} {$i}= $newCand_good{ $i } {$ change} - 
$newCand_bad{ $i } { $pred} ; 
} 

$ruleEQ=$change . " : 10 : eq" ; 
$ruleLS=$change . " : 10 : Is" ; 
$ruleGT=$change . " : 10 : gt" ; 

$rules{ $ruleEQ}=l; 
$rules { $ruleLS } =1 ; 
$rules{$ruleGT}=l; 

for ($i=0; $i<=$maxCandNo; $i++) { 
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$score{$ruleEQ} {$i}= $Cancl_good{$i} { $change} - 
$Cand_bad{$i} {$pred} ; 

print $ruleEQ, "\t",$i, "\t", $Cand_good{ $i } { $change} , " -", 
$Cand_bad{$i} {$pred} , " = $score { $ruleEQ} { $i } \n" ; 
} 

for ($i=l; $i<=$maxCandNo; $i++) { 
for ($j=0; $j<$i; $j++) { 

$score{$ruleLS} { $i } +=$score { $ruleEQ } {$j } ; 

} 

print $ruleLS, "\t",$i, " \t " , $score { $ruleLS } { $i } , " \n" ; 

} 

for ($i=$maxCandNo-l; $i>=0; $i — ){ 
for ($j=$i; $ j<=$maxCandNo; $j++) { 

$score{$ruleGT} {$i}+=$score{$ruleEQ} { $j } ; 

} 

print $ruleGT, "\t",$i, "\t" , $score { $ruleGT } { $i } , "\n" ; 

} 

$ruleLS=$change. " : 11 : Is"; 
$ruleGT=$change. " : ll:gt"; 

$rules{$ruleLS}=l; 
$rules{$ruleGT}=l; 

for ($i=0/ $i<=100; $i++) { 
for ($j=0; $j<$i; $j++) { 

$score{ $ruleLS} { $i } +=$dif f Post_good{ $ j } {$change} - 
$diffPost_bad{$j } {$pred} ; ; 
} 

print $ruleLS, "\t",$i, "\t " , $score { $ruleLS } { $i } , "\n" ; 

} 

for ($i=99; $i>=0; $i~) { 
for ($j=$i; $j<=100; $j++) { 

$score{$ruleGT} {$i}+=$dif fPost_good{$ j } {$change} - 
$diffPost_bad{$j } {$pred} ; ; 
} 

print $ruleGT, "\t", $i, "\t " , $score { $ruleGT } {$i} , "\n"; 

} 



$ruleEQ=$change . " : 12 : eq" ; 
$ruleLS=$change . " : 12 : Is" ; 
$ruleGT=$change . " : 12 : gt " ; 

$rules{$ruleEQ}=l;. 
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$rules{$ruleLS}=l; 
$rules{$ruleGT}=l; 



for ($i=0; $i<=$maxClustLen; $!++){ 

$score{$ruleEQ} {$i}= $ClusterLen_good{ $i } {$change} - 
$ClusterLen_bad{$i} {$pred} ; 

print $ruleEQ, "\t", $i, "\t", $ClusterLen_good{ $i } {$change} , " 

_ It 

$ClusterLen_bad{$i} {$pred} , " = $score { $ruleEQ} { $i } \n" ; 
} 

for ($i=l; $i<=$maxClustLen; $i++) { 
for ($j=0; $j<$i; $j++){ 

$score{$ruleLS} { $i } +=$score { $ruleEQ} {$j } ; 

} 

print $ruleLS, "\t",$i, "\t " , $score { $ruleLS } {$i}, "\n"; 

} 

for ($i=$maxClustLen-l; $i>=0; $i — ) { 
for ($j=$i; $j<=$maxClustLen; $j++) { 

$score{$ruleGT} { $i }+=$ score { $ruleEQ} {$ j } ; 

} 

print $ruleGT, "\t",$i, "\t " , $score { $ruleGT } {$i}, "\n"; 

} 

$ruleEQ=$change . " : 5_6_11 : eq" ; 

} 

print "OUT OF HERE\n"; 

$best_rule=" "; 
$best_score=l ; 

while ( ($k;, $v) =each %rules) { 
*a=$score{ $k} ; 

while ( ($kl,$vl)=each %a) { 
if ($vl >= $best_score) { 

print $k, "\t", $kl, "\t", $vl, "\n"; 
$best_score = $vl; 
$best_rule=$k. ": ".$kl; 

} 

} 

} 

if ($best score >=1 && $best rule ne "") { 
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print "ITERATION: $iteration\n" ; 
print "BEST SCORE $best_score\n" ; 
print "BEST RULE $best_rule\n" ; 

print fout "$best_rule $best_score\n" ; 



@c=split (/ : /, $best_rule) ; 
$attribute=$c[2] ; 
$source=$c [0] ; 
$target=$c[l] ; 
$value=$c [4 ] ; 

if ($attribute eq "7" | | $attribute eq "8" | I $attribute eq "1 

1") { 

$ value=$ value/ 100; 

} 

$comparison=$c [3] ; 



for ($i=0; $i< $no_sent; $i++) { 
if ($comparison eq "eq") { 
if ($attribute='-/_/) { 
if ($value=-/_/) { 

@comp=split ( /_// $at tribute) ; 
@val=split (/_/, $value) ; 

if ($sent [$i] [$comp [0] ] eq $val[0] && $sent [$i] [$comp [ 

1]] eq 

$val[l] SlSl $prediction [$i] eq $source) { 
$prediction [$i] = $target; 

print "I changed $source to $target for $i\n"; 

} 

} 

else { 

print "ERROR $attribute $value\n"; 
last ; 

} 

} 

else{ 

if ($sent [$i] [$attribute] eq $value && $prediction [$i] e 

q 

$source) { 

$prediction [$i] = $target; 

print "I changed $source to $target for $i\n"; 

} 

} 

} 
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elsif ( $comparison eq "ls"){ 

if ($sent [$i] [$attribute] <= $value && $prediction [$i] eq 
$source) { 

$prediction [$i] = $target; 

print "I changed $source to $target for $i\n"; 

} 

} 

elsif ($comparison eq "gt"){ 

if ($sent [$i] [$attribute] >= $value && $prediction [$i] eq 
$source) { 

$prediction [$i] = $target; 

print "I changed $source to $target for $i\n"; 

} 

} 

else{ 

print "ERROR: unknown comparison $comparison ! \n"; 
exit ; 

} 

} 

} 

if ($best_rule eq "") { 
$best_score=-l ; 

} 

} 
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